The original code from Statistical Rethinking is shown below.
Code from Statistical Rethinking modified by R Pruim is shown below. Differences to the oringal include:
lattice and ggplot2 rather than base graphicstidyverse for data transformationpos <- replicate(1000, sum(runif(16, -1, 1)))
prod(1 + runif(12, 0, 0.1))
## [1] 1.707847
growth <- replicate(10000, prod(1 + runif(12, 0, 0.1)))
densityplot(~ growth)
big <- replicate(10000, prod(1 + runif(12, 0, 0.5)))
small <- replicate(10000, prod(1 + runif(12, 0, 0.01)))
log.big <- replicate(10000, log(prod(1 + runif(12, 0, 0.5))))
w <- 6
n <- 9
Water9 <-
expand.grid(
p = seq(from = 0, to = 1, by = 0.01)) %>%
mutate(prior = dunif(p, 0, 1),
likelihood = dbinom(w, n, p),
posterior.raw = prior * likelihood,
posterior1 = posterior.raw / sum(posterior.raw),
posterior = posterior1 / 0.01
)
xyplot(prior + posterior ~ p, data = Water9, type = "l")
library(rethinking)
data(Howell1)
str(Howell1)
## 'data.frame': 544 obs. of 4 variables:
## $ height: num 152 140 137 157 145 ...
## $ weight: num 47.8 36.5 31.9 53 41.3 ...
## $ age : num 63 63 65 41 51 35 32 27 19 54 ...
## $ male : int 1 0 0 1 0 1 0 1 0 1 ...
glimpse(Howell1)
## Observations: 544
## Variables: 4
## $ height <dbl> 151.7650, 139.7000, 136.5250, 156.8450, 145.4150, 163.8...
## $ weight <dbl> 47.82561, 36.48581, 31.86484, 53.04191, 41.27687, 62.99...
## $ age <dbl> 63.0, 63.0, 65.0, 41.0, 51.0, 35.0, 32.0, 27.0, 19.0, 5...
## $ male <int> 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1...
Howell1$height
HowellAdults <- Howell1 %>% filter(age >= 18)
plotDist("norm", mean = 178, sd = 20)
plotDist("unif", min = 0, max = 50)
PriorSample <- data_frame(
mu = rnorm(1e4, 178, 20),
sigma = runif(1e4, 0, 50),
height = rnorm(1e4, mu, sigma)
)
densityplot( ~ height, data = PriorSample)
llnorm <- function(x, mu, sigma) {
sum(dnorm(x, mean = mu, sd = sigma, log = TRUE))
}
# use all of x for each mu-sigma combo
llnorm <- Vectorize(llnorm, c("mu", "sigma"))
HowellAdultsGrid <-
expand.grid(
mu = seq(from = 140, to = 160, length.out = 200),
sigma = seq(from = 4, to = 9, length.out = 200)) %>%
mutate(
logprior =
dnorm(mu, 178, 20, log = TRUE) +
dunif(sigma, 0, 50, log = TRUE),
loglik = llnorm(HowellAdults$height, mu, sigma),
posterior.log = logprior + loglik,
posterior.raw = exp(posterior.log - max(posterior.log)),
posterior = posterior.raw / sum(posterior.raw)
)
contourplot(posterior ~ mu + sigma, data = HowellAdultsGrid)
levelplot(posterior ~ mu + sigma, data = HowellAdultsGrid, contour = TRUE)
levelplot(posterior.log ~ mu + sigma, data = HowellAdultsGrid, contour = TRUE)
## R code 4.17
PosteriorSample <-
sample(HowellAdultsGrid, size = 1e4, replace = TRUE,
prob = HowellAdultsGrid$posterior)
xyplot(sigma ~ mu, data = PosteriorSample,
cex = 0.5,
pch = 16,
col = col.alpha(rangi2, 0.1)
)
ggplot(PosteriorSample, aes(x = mu, y = sigma)) +
geom_point(alpha = 0.2) +
geom_density2d()
ggplot(PosteriorSample, aes(x = mu, y = sigma)) +
geom_hex()
densityplot( ~ mu, data = PosteriorSample)
densityplot( ~ sigma, data = PosteriorSample)
HPDI(PosteriorSample$mu)
## |0.89 0.89|
## 153.8693 155.1759
HPDI(PosteriorSample$sigma)
## |0.89 0.89|
## 7.266332 8.195980
# sample only heights
sample(HowellAdults$height, size = 20)
## [1] 141.6050 145.4150 145.4150 157.4800 160.0200 163.8300 149.2250
## [8] 148.5900 147.9550 150.4950 144.7800 154.9400 147.3200 145.4150
## [15] 155.5750 160.0200 140.3350 160.9598 148.5900 160.6550
# sample complete rows
sample(HowellAdults, size = 20)
## height weight age male orig.id
## 270 160.700 46.30000 31 1 270
## 84 162.560 45.95454 35 1 84
## 188 152.400 41.16347 77 1 188
## 252 157.480 47.57046 43 1 252
## 148 138.430 39.09396 23 0 148
## 328 163.830 46.77667 21 1 328
## 51 145.415 45.64270 23 0 51
## 217 153.035 49.89000 88 1 217
## 163 149.225 35.80542 82 1 163
## 305 163.830 55.39492 43 1 305
## 339 148.590 35.89047 70 0 339
## 164 154.940 45.21745 28 1 164
## 1 151.765 47.82561 63 1 1
## 219 152.400 43.85668 33 1 219
## 248 148.590 42.52425 35 0 248
## 205 160.020 44.62211 68 1 205
## 73 170.180 48.56269 41 1 73
## 95 161.925 41.73046 29 1 95
## 14 149.900 47.70000 20 0 14
## 97 149.225 42.15571 27 0 97
llnorm <- function(x, mu, sigma) {
sum(dnorm(x, mean = mu, sd = sigma, log = TRUE))
}
# use all of x for each mu-sigma combo
llnorm <- Vectorize(llnorm, c("mu", "sigma"))
Howell20 <- sample(HowellAdults, 20)
Howell20Grid <-
expand.grid(
mu = seq(from = 140, to = 160, by = 0.1),
sigma = seq(from = 2, to = 12, by = 0.1)) %>%
mutate(
logprior =
dnorm(mu, 178, 20, log = TRUE) +
dunif(sigma, 0, 50, log = TRUE),
loglik = llnorm(Howell20$height, mu, sigma),
posterior.log = logprior + loglik,
posterior.raw = exp(posterior.log - max(posterior.log)),
posterior = posterior.raw / sum(posterior.raw)
)
Howell20Posterior <-
sample(Howell20Grid, size = 1e4,
replace = TRUE,
prob = Howell20Grid$posterior)
xyplot(sigma ~ mu, data = Howell20Posterior,
cex = 0.5, alpha = 0.1,
xlab = expression(mu), ylab = expression(sigma),
pch = 16
)
ggplot(Howell20Posterior, aes(x = mu, y = sigma)) +
geom_point(alpha = 0.1) +
geom_density2d() +
labs(x = expression(mu), y = expression(sigma))
densityplot( ~ sigma, data = Howell20Posterior)
histogram( ~ sigma, data = Howell20Posterior, width = 0.1, fit = "normal")
xqqmath( ~ sigma, data = Howell20Posterior)
library(rethinking)
data(Howell1)
HowellAdults <- Howell1 %>% filter(age >= 18)
flist <-
alist(
height ~ dnorm(mu, sigma),
mu ~ dnorm(178, 20),
sigma ~ dunif(0, 50))
m4.1 <- map(flist, data = HowellAdults)
precis(m4.1)
## Mean StdDev 5.5% 94.5%
## mu 154.61 0.41 153.95 155.27
## sigma 7.73 0.29 7.27 8.20
start <-
list(
mu = mean( ~ height, data = HowellAdults),
sigma = sd( ~ height, data = HowellAdults))
m4.2 <- map(alist(height ~ dnorm(mu, sigma),
mu ~ dnorm(178, 0.1),
sigma ~ dunif(0, 50)),
data = HowellAdults)
precis(m4.2)
## Mean StdDev 5.5% 94.5%
## mu 177.86 0.10 177.70 178.02
## sigma 24.52 0.93 23.03 26.00
vcov(m4.1)
## mu sigma
## mu 0.1697395835 0.0002181093
## sigma 0.0002181093 0.0849057890
diag(vcov(m4.1))
## mu sigma
## 0.16973958 0.08490579
cov2cor(vcov(m4.1))
## mu sigma
## mu 1.000000000 0.001816828
## sigma 0.001816828 1.000000000
library(rethinking)
post <- extract.samples(m4.1, n = 1e4)
head(post)
## mu sigma
## 1 154.7055 7.787593
## 2 154.2780 7.535810
## 3 154.2687 7.591791
## 4 154.6606 7.717358
## 5 154.4328 7.667167
## 6 154.0539 7.901500
precis(post)
## Mean StdDev |0.89 0.89|
## mu 154.60 0.41 153.93 155.25
## sigma 7.73 0.29 7.26 8.20
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
post <- mvrnorm(n = 1e4,
mu = coef(m4.1),
Sigma = vcov(m4.1))
m4.1_logsigma <-
map(
alist(
height ~ dnorm(mu, exp(log_sigma)),
mu ~ dnorm(178, 20),
log_sigma ~ dnorm(2, 10)
), data = HowellAdults)
m4.1_logsimga.post <-
extract.samples(m4.1_logsigma) %>%
mutate(sigma = exp(log_sigma))
xyplot(height ~ weight, data = HowellAdults)
# load data again, since it's a long way back
library(rethinking)
data(Howell1)
HowellAdults <-
Howell1 %>% filter(age > 18)
# fit model
m4.3 <-
map(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b * weight,
a ~ dnorm(156, 100),
b ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = HowellAdults)
# but note: this doesn't work with link()!
m4.3a <- map(alist(
height ~ dnorm(a + b * weight, sigma),
a ~ dnorm(178, 100),
b ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = HowellAdults)
precis(m4.3)
## Mean StdDev 5.5% 94.5%
## a 113.83 1.94 110.73 116.93
## b 0.91 0.04 0.84 0.97
## sigma 5.11 0.19 4.80 5.42
precis(m4.3, corr = TRUE)
## Mean StdDev 5.5% 94.5% a b sigma
## a 113.83 1.94 110.73 116.93 1.00 -0.99 0
## b 0.91 0.04 0.84 0.97 -0.99 1.00 0
## sigma 5.11 0.19 4.80 5.42 0.00 0.00 1
Centering weight:
HowellAdults <-
HowellAdults %>%
mutate(weight.c = weight - mean(weight))
m4.4 <-
map(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b * weight.c,
a ~ dnorm(178, 100),
b ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = HowellAdults)
precis(m4.4, corr = TRUE)
## Mean StdDev 5.5% 94.5% a b sigma
## a 154.64 0.27 154.21 155.08 1 0 0
## b 0.91 0.04 0.84 0.97 0 1 0
## sigma 5.11 0.19 4.80 5.42 0 0 1
# lattice with custom panel function to overlay multiple things
xyplot(height ~ weight, data = HowellAdults,
panel = function(x, y, ...) {
panel.abline(a = coef(m4.3)["a"], b = coef(m4.3)["b"])
panel.xyplot(x, y, ...)
}
)
# using plotFun() with add = TRUE to plot a function on top of points
xyplot(height ~ weight, data = HowellAdults)
plotFun(a + b * x ~ x, a = coef(m4.3)["a"], b = coef(m4.3)["b"], add = TRUE, col = "red")
# creating a separate function for use with plotFun()
xyplot(height ~ weight, data = HowellAdults)
line.fit <- function(x, a = coef(m4.3)["a"], b = coef(m4.3)["b"]) {
a + b * x
}
plotFun(line.fit(height) ~ height, add = TRUE, col = "red")
m4.3.post <- extract.samples(m4.3)
head(m4.3.post)
## a b sigma
## 1 116.7695 0.8375736 5.229714
## 2 112.0758 0.9377538 5.354243
## 3 114.1854 0.8955245 4.986353
## 4 117.6430 0.8187631 5.056452
## 5 115.8293 0.8614108 5.221715
## 6 113.2485 0.9198923 5.025354
## R code 4.48
HowellSmall <- Howell1 %>% sample(10)
m4.4small <-
map(alist(
height ~ dnorm(mu, sigma),
mu <- a + b * weight,
a ~ dnorm(178, 100),
b ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = HowellSmall)
# extract 50 samples from the posterior
m4.4small.post <- extract.samples(m4.4small, n = 50)
# display raw data and sample size
xyplot(height ~ weight, data = HowellSmall,
xlim = c(30, 65),
ylim = c(130, 180),
col = rangi2,
main = concat("N = ", 10),
panel = function(x, y, ...) {
panel.xyplot(x, y, ...)
# plot the lines, with transparency
for (i in 1:nrow(m4.4small.post)) {
panel.abline(a = m4.4small.post$a[i], b = m4.4small.post$b[i], col = "black",
alpha = 0.1)
}
}
)
m4.4small.post <-
m4.4small.post %>%
mutate(mu_at_50 = a + b * 50)
densityplot( ~ mu_at_50, data = m4.4small.post,
col = rangi2, lwd = 2, xlab = expression(paste(mu, " | weight=50")))
HPDI(m4.4small.post$mu_at_50, prob = 0.89)
## |0.89 0.89|
## 161.1440 167.9631
mu <- link(m4.3)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
## num [1:1000, 1:346] 157 157 157 157 157 ...
# define sequence of weights to compute predictions for
# these values will be on the horizontal axis
m4.3.pred <-
data.frame(weight = seq(from = 25, to = 70, by = 1))
# use link to compute mu
# for each sample from posterior
# and for each weight in weight.seq
mu <-
link(
m4.3,
data = m4.3.pred,
)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
## num [1:1000, 1:46] 137 137 135 138 138 ...
# The shape of mu coming out of link() is backwards for lattice
# t() flips rows and columns to make it work
xyplot(t(mu) ~ weight, data = m4.3.pred, alpha = 0.1, col = rangi2, pch = 16)
# summarize the distribution of mu
mu.mean <- apply(mu, 2, mean)
mu.HPDI <- apply(mu, 2, HPDI, prob = 0.89)
# plot raw data
# fading out points to make line and interval more visible
xyplot(height ~ weight, data = HowellAdults, col = rangi2, alpha = 0.5)
# plot the MAP line, aka the mean mu for each weight
plotPoints(mu.mean ~ weight, data = m4.3.pred, type = "l", add = TRUE)
# plot a shaded region for 89% HPDI
# need to figure out easiest way to do this in lattice
# author's shade() function only works with his base graphics
Here’s roughly how link() works:
post <- extract.samples(m4.3)
mu.link <- function(weight)
post$a + post$b * weight
weight.seq <- seq(from = 25, to = 70, by = 1)
mu <- sapply(weight.seq, mu.link)
mu.mean <- apply(mu, 2, mean)
mu.HPDI <- apply(mu, 2, HPDI, prob = 0.89)
sim.height <- sim(m4.3, data = m4.3.pred)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(sim.height)
## num [1:1000, 1:46] 138 132 136 132 148 ...
height.PI <- apply(sim.height, 2, PI, prob = 0.89)
# plot raw data
xyplot(height ~ weight, HowellAdults, col = rangi2, alpha = 0.5)
# draw MAP line
plotPoints(mu.mean ~ weight, data = m4.3.pred,
col = "blue", type = "l", add = TRUE)
# need a replacement for shade() that works with lattice.
# draw HPDI region for line
# shade(mu.HPDI, weight.seq)
plotPoints(mu.HPDI[1,] ~ weight, data = m4.3.pred,
col = "navy", type = "l", add = TRUE)
plotPoints(mu.HPDI[2,] ~ weight, data = m4.3.pred,
col = "navy", type = "l", add = TRUE)
# draw PI region for simulated heights
# shade(height.PI, weight.seq)
plotPoints(height.PI[1, ] ~ weight, data = m4.3.pred,
col = "red", type = "l", add = TRUE)
plotPoints(height.PI[2, ] ~ weight, data = m4.3.pred,
col = "red", type = "l", add = TRUE)
sim.height <- sim(m4.3, data = m4.3.pred, n = 1e4)
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
height.PI <- apply(sim.height, 2, PI, prob = 0.89)
This is roughly how sim() works.
post <- extract.samples(m4.3)
weight.seq <- 25:70
sim.height <- sapply(weight.seq, function(weight)
rnorm(
n = nrow(post),
mean = post$a + post$b * weight,
sd = post$sigma
))
height.PI <- apply(sim.height, 2, PI, prob = 0.89)
library(rethinking)
data(Howell1)
str(Howell1)
## 'data.frame': 544 obs. of 4 variables:
## $ height: num 152 140 137 157 145 ...
## $ weight: num 47.8 36.5 31.9 53 41.3 ...
## $ age : num 63 63 65 41 51 35 32 27 19 54 ...
## $ male : int 1 0 0 1 0 1 0 1 0 1 ...
require(mosaic) # for zscore()
Howell1 <-
Howell1 %>%
mutate(
weight.s = zscore(weight),
weight.s2 = weight.s^2
)
m4.5 <- map(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b1 * weight.s + b2 * weight.s2,
a ~ dnorm(178, 100),
b1 ~ dnorm(0, 10),
b2 ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = Howell1
)
precis(m4.5)
## Mean StdDev 5.5% 94.5%
## a 146.66 0.37 146.07 147.26
## b1 21.40 0.29 20.94 21.86
## b2 -8.42 0.28 -8.86 -7.97
## sigma 5.75 0.17 5.47 6.03
m4.5.pred <-
data_frame(
weight.s = seq(from = -2.2, to = 2, length.out = 30),
weight.s2 = weight.s^2
)
mu <- link(m4.5, data = m4.5.pred)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
sim.height <- sim(m4.5, data = m4.5.pred)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.PI <- apply(mu, 2, PI, prob = 0.89)
m4.5.pred <-
m4.5.pred %>%
mutate(
mu.mean = apply(mu, 2, mean),
mu.lo = apply(mu, 2, PI)[1,],
mu.hi = apply(mu, 2, PI)[2,],
sim.lo = apply(sim.height, 2, PI)[1,],
sim.hi = apply(sim.height, 2, PI)[2,]
)
xyplot(sim.hi + mu.hi + mu.mean + mu.lo + sim.lo ~ weight.s,
data = m4.5.pred, type = "l", auto.key = list(lines = TRUE, points = FALSE))
plotPoints(height ~ weight.s, data = Howell1, col = rangi2, alpha = 0.5, add = TRUE)
ggplot(aes(x = weight.s), data = m4.5.pred) +
geom_point(aes(y = height), data = Howell1, color = rangi2) +
geom_line(aes(y = mu.lo), color = "navy") +
geom_line(aes(y = mu.hi), color = "navy") +
geom_line(aes(y = sim.lo), color = "red") +
geom_line(aes(y = sim.hi), color = "red")
Howell1 <-
Howell1 %>% mutate(weight.s3 = weight.s^3)
m4.6 <- map(
alist(
height ~ dnorm(mu, sigma),
mu <- a + b1 * weight.s + b2 * weight.s2 + b3 * weight.s3,
a ~ dnorm(178, 100),
b1 ~ dnorm(0, 10),
b2 ~ dnorm(0, 10),
b3 ~ dnorm(0, 10),
sigma ~ dunif(0, 50)
),
data = Howell1
)
xyplot(
height ~ weight, data = Howell1,
col = rangi2, alpha = 0.5)
xyplot(height ~ weight, data = Howell1,
col = rangi2,
scales=list(
x = list(at = round((-2:2) * sd(Howell1$weight) + mean(Howell1$weight), 1))
)
)
## R code 4.73
xyplot(height ~ weight, data = Howell1,
col = rangi2, alpha = 0.4)